package org.example;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

public class Test05_CSVFile {

    public static void main(String[] args) {

        // 1.创建配置对象
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("sparkCore");

        // 2. 创建sparkContext
        JavaSparkContext sc = new JavaSparkContext(conf);


        JavaRDD<String> rdd = sc.textFile("input/user.csv");

        JavaRDD<String> filterRDD = rdd.filter(new Function<String, Boolean>() {
            @Override
            public Boolean call(String v1) throws Exception {
                /*
                过滤掉  空行
                      有字段缺失的
                      第一行表头内容
                 */
                return !"".equals(v1);
            }
        });

        filterRDD.collect().forEach(System.out::println);


        // 4. 关闭sc
        sc.stop();
    }
}